# !conda create -n myenv python=3.9 -y
#!source /root/anaconda3/etc/profile.d/conda.sh
#!conda activate myenv
#!python -m pip install pyyaml==5.1
# Detectron2 has not released pre-built binaries for the latest pytorch (https://github.com/facebookresearch/detectron2/issues/4053)
# so we install from source instead. This takes a few minutes.
#!python -m pip install 'git+https://github.com/facebookresearch/detectron2.git'
# Install pre-built detectron2 that matches pytorch version, if released:
# See https://detectron2.readthedocs.io/tutorials/install.html for instructions
#!pip install detectron2 -f https://dl.fbaipublicfiles.com/detectron2/wheels/{CUDA_VERSION}/{TORCH_VERSION}/index.html
# exit(0) # After installation, you may need to "restart runtime" in Colab. This line can also restart runtime
import torch, detectron2
!nvcc --version
TORCH_VERSION = ".".join(torch.__version__.split(".")[:2])
CUDA_VERSION = torch.__version__.split("+")[-1]
print("torch: ", TORCH_VERSION, "; cuda: ", CUDA_VERSION)
print("detectron2:", detectron2.__version__)
nvcc: NVIDIA (R) Cuda compiler driver Copyright (c) 2005-2021 NVIDIA Corporation Built on Thu_Nov_18_09:45:30_PST_2021 Cuda compilation tools, release 11.5, V11.5.119 Build cuda_11.5.r11.5/compiler.30672275_0 torch: 1.9 ; cuda: cu111 detectron2: 0.6
# Some basic setup:
# Setup detectron2 logger
import detectron2
from detectron2.utils.logger import setup_logger
setup_logger()
# import some common libraries
import numpy as np
import os, json, cv2, random
#from google.colab.patches import cv2_imshow
# import some common detectron2 utilities
from detectron2 import model_zoo
from detectron2.engine import DefaultPredictor
from detectron2.config import get_cfg
from detectron2.utils.visualizer import Visualizer
from detectron2.data import MetadataCatalog, DatasetCatalog
import json
# Opening JSON file
f = open('/root/raghav/labelstudio/result.json')
dataset_dicts = json.load(f)
dataset_dicts['images'][1]
{'width': 1535,
'height': 2730,
'id': 1,
'file_name': 'images/11/82c7c2ca-11.jpg'}
dataset_dicts['categories']
[{'id': 0, 'name': 'Wire'}]
dataset_dicts.keys()
dict_keys(['images', 'categories', 'annotations', 'info'])
!ls /root/raghav/labelstudio/images/11
03865ad2-25.jpg 765ef5c1-9.jpg a525426b-24.jpg 0571f85d-4.jpg 7b2945ef-IMG_0340.JPG a7bd461c-8.JPG 060aaccc-IMG_0345.JPG 7e632d99-IMG_0306.JPG a99d9f5b-IMG_0323.JPG 16bee31b-1.jpg 7f355d57-2.JPG ab3b01ca-IMG_0352.JPG 1af4a433-IMG_0354.JPG 7fc61b3a-IMG_0318.JPG abfcc678-IMG_0339.JPG 23c6bf74-5.jpg 82c7c2ca-11.jpg acef4032-IMG_0342.JPG 2f68429a-IMG_0333.JPG 838b8505-IMG_0330.JPG b6e1149a-IMG_0329.JPG 30b24faf-21.jpg 83fd0d53-IMG_0304.JPG bdcfba0e-14.jpg 3576ba5b-20.jpg 859c9222-IMG_0313.JPG bf411d4c-IMG_0294.JPG 39fa94e9-19.jpg 8b4b117d-IMG_0344.JPG cd6ea9ce-IMG_0355.JPG 3a9d0723-IMG_0346.JPG 8b92b444-IMG_0322.JPG ce39d6ee-IMG_0335.JPG 438005ea-IMG_0326.JPG 8cde8e52-3.jpg cfb5b88a-IMG_0301.JPG 4946c5fb-IMG_0331.JPG 8ce904be-IMG_0348.JPG dd70db44-IMG_0343.JPG 4bd013d1-IMG_0303.JPG 8d9688df-17.jpg de03b894-7.jpg 52df14cb-12.jpg 8f5d6a91-IMG_0332.JPG dec01bf9-13.jpg 5b1dbf69-IMG_E0297.JPG 9152cc57-IMG_0349.JPG e0400583-IMG_0347.JPG 5d1b016a-IMG_0336.JPG 9259c363-IMG_0311.JPG e1a5c9ad-IMG_0312.JPG 5fae8912-IMG_0338.JPG 9505cd1e-IMG_0299.JPG e3423fba-IMG_0308.JPG 60708a60-IMG_0316.JPG 96c90a35-16.jpg e4f2229d-IMG_0337.JPG 62c4d761-IMG_0334.JPG 9a34f8a1-IMG_0320.JPG e73b6357-10.jpg 659fbde8-18.jpg 9f7b7a1f-IMG_0298.JPG ed89dda7-IMG_0321.JPG 69db667b-15.JPG a1a4c73b-IMG_0295.JPG f2724ae4-IMG_0319.JPG 6cf19201-IMG_0309.JPG a29d1440-23.jpg f2b94273-IMG_0350.JPG 6d9d2d6f-IMG_0359.JPG a2f9206c-IMG_0353.JPG f66ef193-22.jpg 74f14799-IMG_0300.JPG a3bb42c6-IMG_0302.JPG fbbba93f-6.JPG
from detectron2.data.datasets import register_coco_instances
register_coco_instances("my_dataset8", {}, "/root/raghav/labelstudio/result.json", "/root/raghav/labelstudio/")
#register_coco_instances("my_dataset_val", {}, "json_annotation_val.json", "path/to/image/dir")
# if your dataset is in COCO format, this cell can be replaced by the following three lines:
# from detectron2.data.datasets import register_coco_instances
# register_coco_instances("my_dataset_train", {}, "json_annotation_train.json", "path/to/image/dir")
# register_coco_instances("my_dataset_val", {}, "json_annotation_val.json", "path/to/image/dir")
from detectron2.structures import BoxMode
Now, let's fine-tune a COCO-pretrained R50-FPN Mask R-CNN model on the balloon dataset. It takes ~2 minutes to train 300 iterations on a P100 GPU.
from detectron2.engine import DefaultTrainer
cfg = get_cfg()
cfg.merge_from_file(model_zoo.get_config_file("COCO-InstanceSegmentation/mask_rcnn_R_101_FPN_3x.yaml"))
cfg.DATASETS.TRAIN = ("my_dataset8",)
cfg.DATASETS.TEST = ("my_dataset8",)
cfg.DATALOADER.NUM_WORKERS = 2
cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url("COCO-InstanceSegmentation/mask_rcnn_R_101_FPN_3x.yaml") # Let training initialize from model zoo
cfg.SOLVER.IMS_PER_BATCH = 2 # This is the real "batch size" commonly known to deep learning people
cfg.SOLVER.BASE_LR = 0.00025 # pick a good LR
cfg.SOLVER.MAX_ITER = 1000 # 300 iterations seems good enough for this toy dataset; you will need to train longer for a practical dataset
cfg.SOLVER.STEPS = [] # do not decay learning rate
cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE = 128 # The "RoIHead batch size". 128 is faster, and good enough for this toy dataset (default: 512)
cfg.MODEL.ROI_HEADS.NUM_CLASSES = 1 # only has one class (ballon). (see https://detectron2.readthedocs.io/tutorials/datasets.html#update-the-config-for-new-datasets)
# NOTE: this config means the number of classes, but a few popular unofficial tutorials incorrect uses num_classes+1 here.
os.makedirs(cfg.OUTPUT_DIR, exist_ok=True)
trainer = DefaultTrainer(cfg)
trainer.resume_or_load(resume=False)
[01/03 13:32:01 d2.engine.defaults]: Model: GeneralizedRCNN( (backbone): FPN( (fpn_lateral2): Conv2d(256, 256, kernel_size=(1, 1), stride=(1, 1)) (fpn_output2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) (fpn_lateral3): Conv2d(512, 256, kernel_size=(1, 1), stride=(1, 1)) (fpn_output3): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) (fpn_lateral4): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1)) (fpn_output4): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) (fpn_lateral5): Conv2d(2048, 256, kernel_size=(1, 1), stride=(1, 1)) (fpn_output5): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) (top_block): LastLevelMaxPool() (bottom_up): ResNet( (stem): BasicStem( (conv1): Conv2d( 3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False (norm): FrozenBatchNorm2d(num_features=64, eps=1e-05) ) ) (res2): Sequential( (0): BottleneckBlock( (shortcut): Conv2d( 64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=256, eps=1e-05) ) (conv1): Conv2d( 64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=64, eps=1e-05) ) (conv2): Conv2d( 64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=64, eps=1e-05) ) (conv3): Conv2d( 64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=256, eps=1e-05) ) ) (1): BottleneckBlock( (conv1): Conv2d( 256, 64, kernel_size=(1, 1), stride=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=64, eps=1e-05) ) (conv2): Conv2d( 64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=64, eps=1e-05) ) (conv3): Conv2d( 64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=256, eps=1e-05) ) ) (2): BottleneckBlock( (conv1): Conv2d( 256, 64, kernel_size=(1, 1), stride=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=64, eps=1e-05) ) (conv2): Conv2d( 64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=64, eps=1e-05) ) (conv3): Conv2d( 64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=256, eps=1e-05) ) ) ) (res3): Sequential( (0): BottleneckBlock( (shortcut): Conv2d( 256, 512, kernel_size=(1, 1), stride=(2, 2), bias=False (norm): FrozenBatchNorm2d(num_features=512, eps=1e-05) ) (conv1): Conv2d( 256, 128, kernel_size=(1, 1), stride=(2, 2), bias=False (norm): FrozenBatchNorm2d(num_features=128, eps=1e-05) ) (conv2): Conv2d( 128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=128, eps=1e-05) ) (conv3): Conv2d( 128, 512, kernel_size=(1, 1), stride=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=512, eps=1e-05) ) ) (1): BottleneckBlock( (conv1): Conv2d( 512, 128, kernel_size=(1, 1), stride=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=128, eps=1e-05) ) (conv2): Conv2d( 128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=128, eps=1e-05) ) (conv3): Conv2d( 128, 512, kernel_size=(1, 1), stride=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=512, eps=1e-05) ) ) (2): BottleneckBlock( (conv1): Conv2d( 512, 128, kernel_size=(1, 1), stride=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=128, eps=1e-05) ) (conv2): Conv2d( 128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=128, eps=1e-05) ) (conv3): Conv2d( 128, 512, kernel_size=(1, 1), stride=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=512, eps=1e-05) ) ) (3): BottleneckBlock( (conv1): Conv2d( 512, 128, kernel_size=(1, 1), stride=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=128, eps=1e-05) ) (conv2): Conv2d( 128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=128, eps=1e-05) ) (conv3): Conv2d( 128, 512, kernel_size=(1, 1), stride=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=512, eps=1e-05) ) ) ) (res4): Sequential( (0): BottleneckBlock( (shortcut): Conv2d( 512, 1024, kernel_size=(1, 1), stride=(2, 2), bias=False (norm): FrozenBatchNorm2d(num_features=1024, eps=1e-05) ) (conv1): Conv2d( 512, 256, kernel_size=(1, 1), stride=(2, 2), bias=False (norm): FrozenBatchNorm2d(num_features=256, eps=1e-05) ) (conv2): Conv2d( 256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=256, eps=1e-05) ) (conv3): Conv2d( 256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=1024, eps=1e-05) ) ) (1): BottleneckBlock( (conv1): Conv2d( 1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=256, eps=1e-05) ) (conv2): Conv2d( 256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=256, eps=1e-05) ) (conv3): Conv2d( 256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=1024, eps=1e-05) ) ) (2): BottleneckBlock( (conv1): Conv2d( 1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=256, eps=1e-05) ) (conv2): Conv2d( 256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=256, eps=1e-05) ) (conv3): Conv2d( 256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=1024, eps=1e-05) ) ) (3): BottleneckBlock( (conv1): Conv2d( 1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=256, eps=1e-05) ) (conv2): Conv2d( 256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=256, eps=1e-05) ) (conv3): Conv2d( 256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=1024, eps=1e-05) ) ) (4): BottleneckBlock( (conv1): Conv2d( 1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=256, eps=1e-05) ) (conv2): Conv2d( 256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=256, eps=1e-05) ) (conv3): Conv2d( 256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=1024, eps=1e-05) ) ) (5): BottleneckBlock( (conv1): Conv2d( 1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=256, eps=1e-05) ) (conv2): Conv2d( 256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=256, eps=1e-05) ) (conv3): Conv2d( 256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=1024, eps=1e-05) ) ) (6): BottleneckBlock( (conv1): Conv2d( 1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=256, eps=1e-05) ) (conv2): Conv2d( 256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=256, eps=1e-05) ) (conv3): Conv2d( 256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=1024, eps=1e-05) ) ) (7): BottleneckBlock( (conv1): Conv2d( 1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=256, eps=1e-05) ) (conv2): Conv2d( 256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=256, eps=1e-05) ) (conv3): Conv2d( 256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=1024, eps=1e-05) ) ) (8): BottleneckBlock( (conv1): Conv2d( 1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=256, eps=1e-05) ) (conv2): Conv2d( 256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=256, eps=1e-05) ) (conv3): Conv2d( 256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=1024, eps=1e-05) ) ) (9): BottleneckBlock( (conv1): Conv2d( 1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=256, eps=1e-05) ) (conv2): Conv2d( 256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=256, eps=1e-05) ) (conv3): Conv2d( 256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=1024, eps=1e-05) ) ) (10): BottleneckBlock( (conv1): Conv2d( 1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=256, eps=1e-05) ) (conv2): Conv2d( 256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=256, eps=1e-05) ) (conv3): Conv2d( 256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=1024, eps=1e-05) ) ) (11): BottleneckBlock( (conv1): Conv2d( 1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=256, eps=1e-05) ) (conv2): Conv2d( 256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=256, eps=1e-05) ) (conv3): Conv2d( 256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=1024, eps=1e-05) ) ) (12): BottleneckBlock( (conv1): Conv2d( 1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=256, eps=1e-05) ) (conv2): Conv2d( 256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=256, eps=1e-05) ) (conv3): Conv2d( 256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=1024, eps=1e-05) ) ) (13): BottleneckBlock( (conv1): Conv2d( 1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=256, eps=1e-05) ) (conv2): Conv2d( 256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=256, eps=1e-05) ) (conv3): Conv2d( 256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=1024, eps=1e-05) ) ) (14): BottleneckBlock( (conv1): Conv2d( 1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=256, eps=1e-05) ) (conv2): Conv2d( 256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=256, eps=1e-05) ) (conv3): Conv2d( 256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=1024, eps=1e-05) ) ) (15): BottleneckBlock( (conv1): Conv2d( 1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=256, eps=1e-05) ) (conv2): Conv2d( 256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=256, eps=1e-05) ) (conv3): Conv2d( 256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=1024, eps=1e-05) ) ) (16): BottleneckBlock( (conv1): Conv2d( 1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=256, eps=1e-05) ) (conv2): Conv2d( 256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=256, eps=1e-05) ) (conv3): Conv2d( 256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=1024, eps=1e-05) ) ) (17): BottleneckBlock( (conv1): Conv2d( 1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=256, eps=1e-05) ) (conv2): Conv2d( 256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=256, eps=1e-05) ) (conv3): Conv2d( 256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=1024, eps=1e-05) ) ) (18): BottleneckBlock( (conv1): Conv2d( 1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=256, eps=1e-05) ) (conv2): Conv2d( 256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=256, eps=1e-05) ) (conv3): Conv2d( 256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=1024, eps=1e-05) ) ) (19): BottleneckBlock( (conv1): Conv2d( 1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=256, eps=1e-05) ) (conv2): Conv2d( 256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=256, eps=1e-05) ) (conv3): Conv2d( 256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=1024, eps=1e-05) ) ) (20): BottleneckBlock( (conv1): Conv2d( 1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=256, eps=1e-05) ) (conv2): Conv2d( 256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=256, eps=1e-05) ) (conv3): Conv2d( 256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=1024, eps=1e-05) ) ) (21): BottleneckBlock( (conv1): Conv2d( 1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=256, eps=1e-05) ) (conv2): Conv2d( 256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=256, eps=1e-05) ) (conv3): Conv2d( 256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=1024, eps=1e-05) ) ) (22): BottleneckBlock( (conv1): Conv2d( 1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=256, eps=1e-05) ) (conv2): Conv2d( 256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=256, eps=1e-05) ) (conv3): Conv2d( 256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=1024, eps=1e-05) ) ) ) (res5): Sequential( (0): BottleneckBlock( (shortcut): Conv2d( 1024, 2048, kernel_size=(1, 1), stride=(2, 2), bias=False (norm): FrozenBatchNorm2d(num_features=2048, eps=1e-05) ) (conv1): Conv2d( 1024, 512, kernel_size=(1, 1), stride=(2, 2), bias=False (norm): FrozenBatchNorm2d(num_features=512, eps=1e-05) ) (conv2): Conv2d( 512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=512, eps=1e-05) ) (conv3): Conv2d( 512, 2048, kernel_size=(1, 1), stride=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=2048, eps=1e-05) ) ) (1): BottleneckBlock( (conv1): Conv2d( 2048, 512, kernel_size=(1, 1), stride=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=512, eps=1e-05) ) (conv2): Conv2d( 512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=512, eps=1e-05) ) (conv3): Conv2d( 512, 2048, kernel_size=(1, 1), stride=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=2048, eps=1e-05) ) ) (2): BottleneckBlock( (conv1): Conv2d( 2048, 512, kernel_size=(1, 1), stride=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=512, eps=1e-05) ) (conv2): Conv2d( 512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=512, eps=1e-05) ) (conv3): Conv2d( 512, 2048, kernel_size=(1, 1), stride=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=2048, eps=1e-05) ) ) ) ) ) (proposal_generator): RPN( (rpn_head): StandardRPNHead( (conv): Conv2d( 256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1) (activation): ReLU() ) (objectness_logits): Conv2d(256, 3, kernel_size=(1, 1), stride=(1, 1)) (anchor_deltas): Conv2d(256, 12, kernel_size=(1, 1), stride=(1, 1)) ) (anchor_generator): DefaultAnchorGenerator( (cell_anchors): BufferList() ) ) (roi_heads): StandardROIHeads( (box_pooler): ROIPooler( (level_poolers): ModuleList( (0): ROIAlign(output_size=(7, 7), spatial_scale=0.25, sampling_ratio=0, aligned=True) (1): ROIAlign(output_size=(7, 7), spatial_scale=0.125, sampling_ratio=0, aligned=True) (2): ROIAlign(output_size=(7, 7), spatial_scale=0.0625, sampling_ratio=0, aligned=True) (3): ROIAlign(output_size=(7, 7), spatial_scale=0.03125, sampling_ratio=0, aligned=True) ) ) (box_head): FastRCNNConvFCHead( (flatten): Flatten(start_dim=1, end_dim=-1) (fc1): Linear(in_features=12544, out_features=1024, bias=True) (fc_relu1): ReLU() (fc2): Linear(in_features=1024, out_features=1024, bias=True) (fc_relu2): ReLU() ) (box_predictor): FastRCNNOutputLayers( (cls_score): Linear(in_features=1024, out_features=2, bias=True) (bbox_pred): Linear(in_features=1024, out_features=4, bias=True) ) (mask_pooler): ROIPooler( (level_poolers): ModuleList( (0): ROIAlign(output_size=(14, 14), spatial_scale=0.25, sampling_ratio=0, aligned=True) (1): ROIAlign(output_size=(14, 14), spatial_scale=0.125, sampling_ratio=0, aligned=True) (2): ROIAlign(output_size=(14, 14), spatial_scale=0.0625, sampling_ratio=0, aligned=True) (3): ROIAlign(output_size=(14, 14), spatial_scale=0.03125, sampling_ratio=0, aligned=True) ) ) (mask_head): MaskRCNNConvUpsampleHead( (mask_fcn1): Conv2d( 256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1) (activation): ReLU() ) (mask_fcn2): Conv2d( 256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1) (activation): ReLU() ) (mask_fcn3): Conv2d( 256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1) (activation): ReLU() ) (mask_fcn4): Conv2d( 256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1) (activation): ReLU() ) (deconv): ConvTranspose2d(256, 256, kernel_size=(2, 2), stride=(2, 2)) (deconv_relu): ReLU() (predictor): Conv2d(256, 1, kernel_size=(1, 1), stride=(1, 1)) ) ) ) WARNING [01/03 13:32:01 d2.data.datasets.coco]: Category ids in annotations are not in [1, #categories]! We'll apply a mapping for you. [01/03 13:32:01 d2.data.datasets.coco]: Loaded 75 images in COCO format from /root/raghav/labelstudio/result.json [01/03 13:32:01 d2.data.build]: Removed 8 images with no usable annotations. 67 images left. [01/03 13:32:01 d2.data.build]: Distribution of instances among all 1 categories: | category | #instances | |:----------:|:-------------| | Wire | 107 | | | | [01/03 13:32:01 d2.data.dataset_mapper]: [DatasetMapper] Augmentations used in training: [ResizeShortestEdge(short_edge_length=(640, 672, 704, 736, 768, 800), max_size=1333, sample_style='choice'), RandomFlip()] [01/03 13:32:01 d2.data.build]: Using training sampler TrainingSampler [01/03 13:32:01 d2.data.common]: Serializing 67 elements to byte tensors and concatenating them all ... [01/03 13:32:01 d2.data.common]: Serialized dataset takes 0.05 MiB
2023-01-03 13:32:01.913345: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2023-01-03 13:32:03.289056: E tensorflow/stream_executor/cuda/cuda_blas.cc:2981] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2023-01-03 13:32:05.476628: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /root/anaconda3/lib/python3.9/site-packages/cv2/../../lib64:
2023-01-03 13:32:05.476749: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /root/anaconda3/lib/python3.9/site-packages/cv2/../../lib64:
2023-01-03 13:32:05.476762: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly.
/root/anaconda3/lib/python3.9/site-packages/scipy/__init__.py:146: UserWarning: A NumPy version >=1.16.5 and <1.23.0 is required for this version of SciPy (detected version 1.23.0
warnings.warn(f"A NumPy version >={np_minversion} and <{np_maxversion}"
Skip loading parameter 'roi_heads.box_predictor.cls_score.weight' to the model due to incompatible shapes: (81, 1024) in the checkpoint but (2, 1024) in the model! You might want to double check if this is expected.
Skip loading parameter 'roi_heads.box_predictor.cls_score.bias' to the model due to incompatible shapes: (81,) in the checkpoint but (2,) in the model! You might want to double check if this is expected.
Skip loading parameter 'roi_heads.box_predictor.bbox_pred.weight' to the model due to incompatible shapes: (320, 1024) in the checkpoint but (4, 1024) in the model! You might want to double check if this is expected.
Skip loading parameter 'roi_heads.box_predictor.bbox_pred.bias' to the model due to incompatible shapes: (320,) in the checkpoint but (4,) in the model! You might want to double check if this is expected.
Skip loading parameter 'roi_heads.mask_head.predictor.weight' to the model due to incompatible shapes: (80, 256, 1, 1) in the checkpoint but (1, 256, 1, 1) in the model! You might want to double check if this is expected.
Skip loading parameter 'roi_heads.mask_head.predictor.bias' to the model due to incompatible shapes: (80,) in the checkpoint but (1,) in the model! You might want to double check if this is expected.
Some model parameters or buffers are not found in the checkpoint:
roi_heads.box_predictor.bbox_pred.{bias, weight}
roi_heads.box_predictor.cls_score.{bias, weight}
roi_heads.mask_head.predictor.{bias, weight}
#trainer.train()
# Look at training curves in tensorboard:
#%load_ext tensorboard
#%tensorboard --logdir output
Now, let's run inference with the trained model on the balloon validation dataset. First, let's create a predictor using the model we just trained:
cfg.OUTPUT_DIR
'./output'
from detectron2.utils.visualizer import ColorMode
import matplotlib.pyplot as plt
from skimage.io import imshow, imread
from skimage.color import rgb2hsv, hsv2rgb
k=dataset_dicts['images'][5]['file_name']
im = cv2.imread("/root/raghav/labelstudio/"+k,1)
imshow(im)
<matplotlib.image.AxesImage at 0x7f19dd4e24f0>
#im1=imshow("/root/raghav/labelstudio/wires/images (7).jpeg")
# Inference should use the config with parameters that are used in training
# cfg now already contains everything we've set previously. We changed it a little bit for inference:
cfg.MODEL.WEIGHTS = os.path.join(cfg.OUTPUT_DIR, "model_final.pth") # path to the model we just trained
cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.5 # set a custom testing threshold
predictor = DefaultPredictor(cfg)
[01/03 13:33:19 d2.checkpoint.c2_model_loading]: Following weights matched with model:
| Names in Model | Names in Checkpoint | Shapes |
|:------------------------------------------------|:-----------------------------------------------------------------------------------------------------|:------------------------------------------------|
| backbone.bottom_up.res2.0.conv1.* | backbone.bottom_up.res2.0.conv1.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (64,) (64,) (64,) (64,) (64,64,1,1) |
| backbone.bottom_up.res2.0.conv2.* | backbone.bottom_up.res2.0.conv2.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (64,) (64,) (64,) (64,) (64,64,3,3) |
| backbone.bottom_up.res2.0.conv3.* | backbone.bottom_up.res2.0.conv3.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (256,) (256,) (256,) (256,) (256,64,1,1) |
| backbone.bottom_up.res2.0.shortcut.* | backbone.bottom_up.res2.0.shortcut.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (256,) (256,) (256,) (256,) (256,64,1,1) |
| backbone.bottom_up.res2.1.conv1.* | backbone.bottom_up.res2.1.conv1.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (64,) (64,) (64,) (64,) (64,256,1,1) |
| backbone.bottom_up.res2.1.conv2.* | backbone.bottom_up.res2.1.conv2.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (64,) (64,) (64,) (64,) (64,64,3,3) |
| backbone.bottom_up.res2.1.conv3.* | backbone.bottom_up.res2.1.conv3.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (256,) (256,) (256,) (256,) (256,64,1,1) |
| backbone.bottom_up.res2.2.conv1.* | backbone.bottom_up.res2.2.conv1.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (64,) (64,) (64,) (64,) (64,256,1,1) |
| backbone.bottom_up.res2.2.conv2.* | backbone.bottom_up.res2.2.conv2.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (64,) (64,) (64,) (64,) (64,64,3,3) |
| backbone.bottom_up.res2.2.conv3.* | backbone.bottom_up.res2.2.conv3.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (256,) (256,) (256,) (256,) (256,64,1,1) |
| backbone.bottom_up.res3.0.conv1.* | backbone.bottom_up.res3.0.conv1.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (128,) (128,) (128,) (128,) (128,256,1,1) |
| backbone.bottom_up.res3.0.conv2.* | backbone.bottom_up.res3.0.conv2.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (128,) (128,) (128,) (128,) (128,128,3,3) |
| backbone.bottom_up.res3.0.conv3.* | backbone.bottom_up.res3.0.conv3.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (512,) (512,) (512,) (512,) (512,128,1,1) |
| backbone.bottom_up.res3.0.shortcut.* | backbone.bottom_up.res3.0.shortcut.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (512,) (512,) (512,) (512,) (512,256,1,1) |
| backbone.bottom_up.res3.1.conv1.* | backbone.bottom_up.res3.1.conv1.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (128,) (128,) (128,) (128,) (128,512,1,1) |
| backbone.bottom_up.res3.1.conv2.* | backbone.bottom_up.res3.1.conv2.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (128,) (128,) (128,) (128,) (128,128,3,3) |
| backbone.bottom_up.res3.1.conv3.* | backbone.bottom_up.res3.1.conv3.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (512,) (512,) (512,) (512,) (512,128,1,1) |
| backbone.bottom_up.res3.2.conv1.* | backbone.bottom_up.res3.2.conv1.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (128,) (128,) (128,) (128,) (128,512,1,1) |
| backbone.bottom_up.res3.2.conv2.* | backbone.bottom_up.res3.2.conv2.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (128,) (128,) (128,) (128,) (128,128,3,3) |
| backbone.bottom_up.res3.2.conv3.* | backbone.bottom_up.res3.2.conv3.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (512,) (512,) (512,) (512,) (512,128,1,1) |
| backbone.bottom_up.res3.3.conv1.* | backbone.bottom_up.res3.3.conv1.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (128,) (128,) (128,) (128,) (128,512,1,1) |
| backbone.bottom_up.res3.3.conv2.* | backbone.bottom_up.res3.3.conv2.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (128,) (128,) (128,) (128,) (128,128,3,3) |
| backbone.bottom_up.res3.3.conv3.* | backbone.bottom_up.res3.3.conv3.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (512,) (512,) (512,) (512,) (512,128,1,1) |
| backbone.bottom_up.res4.0.conv1.* | backbone.bottom_up.res4.0.conv1.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (256,) (256,) (256,) (256,) (256,512,1,1) |
| backbone.bottom_up.res4.0.conv2.* | backbone.bottom_up.res4.0.conv2.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (256,) (256,) (256,) (256,) (256,256,3,3) |
| backbone.bottom_up.res4.0.conv3.* | backbone.bottom_up.res4.0.conv3.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (1024,) (1024,) (1024,) (1024,) (1024,256,1,1) |
| backbone.bottom_up.res4.0.shortcut.* | backbone.bottom_up.res4.0.shortcut.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (1024,) (1024,) (1024,) (1024,) (1024,512,1,1) |
| backbone.bottom_up.res4.1.conv1.* | backbone.bottom_up.res4.1.conv1.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (256,) (256,) (256,) (256,) (256,1024,1,1) |
| backbone.bottom_up.res4.1.conv2.* | backbone.bottom_up.res4.1.conv2.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (256,) (256,) (256,) (256,) (256,256,3,3) |
| backbone.bottom_up.res4.1.conv3.* | backbone.bottom_up.res4.1.conv3.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (1024,) (1024,) (1024,) (1024,) (1024,256,1,1) |
| backbone.bottom_up.res4.10.conv1.* | backbone.bottom_up.res4.10.conv1.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (256,) (256,) (256,) (256,) (256,1024,1,1) |
| backbone.bottom_up.res4.10.conv2.* | backbone.bottom_up.res4.10.conv2.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (256,) (256,) (256,) (256,) (256,256,3,3) |
| backbone.bottom_up.res4.10.conv3.* | backbone.bottom_up.res4.10.conv3.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (1024,) (1024,) (1024,) (1024,) (1024,256,1,1) |
| backbone.bottom_up.res4.11.conv1.* | backbone.bottom_up.res4.11.conv1.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (256,) (256,) (256,) (256,) (256,1024,1,1) |
| backbone.bottom_up.res4.11.conv2.* | backbone.bottom_up.res4.11.conv2.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (256,) (256,) (256,) (256,) (256,256,3,3) |
| backbone.bottom_up.res4.11.conv3.* | backbone.bottom_up.res4.11.conv3.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (1024,) (1024,) (1024,) (1024,) (1024,256,1,1) |
| backbone.bottom_up.res4.12.conv1.* | backbone.bottom_up.res4.12.conv1.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (256,) (256,) (256,) (256,) (256,1024,1,1) |
| backbone.bottom_up.res4.12.conv2.* | backbone.bottom_up.res4.12.conv2.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (256,) (256,) (256,) (256,) (256,256,3,3) |
| backbone.bottom_up.res4.12.conv3.* | backbone.bottom_up.res4.12.conv3.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (1024,) (1024,) (1024,) (1024,) (1024,256,1,1) |
| backbone.bottom_up.res4.13.conv1.* | backbone.bottom_up.res4.13.conv1.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (256,) (256,) (256,) (256,) (256,1024,1,1) |
| backbone.bottom_up.res4.13.conv2.* | backbone.bottom_up.res4.13.conv2.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (256,) (256,) (256,) (256,) (256,256,3,3) |
| backbone.bottom_up.res4.13.conv3.* | backbone.bottom_up.res4.13.conv3.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (1024,) (1024,) (1024,) (1024,) (1024,256,1,1) |
| backbone.bottom_up.res4.14.conv1.* | backbone.bottom_up.res4.14.conv1.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (256,) (256,) (256,) (256,) (256,1024,1,1) |
| backbone.bottom_up.res4.14.conv2.* | backbone.bottom_up.res4.14.conv2.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (256,) (256,) (256,) (256,) (256,256,3,3) |
| backbone.bottom_up.res4.14.conv3.* | backbone.bottom_up.res4.14.conv3.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (1024,) (1024,) (1024,) (1024,) (1024,256,1,1) |
| backbone.bottom_up.res4.15.conv1.* | backbone.bottom_up.res4.15.conv1.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (256,) (256,) (256,) (256,) (256,1024,1,1) |
| backbone.bottom_up.res4.15.conv2.* | backbone.bottom_up.res4.15.conv2.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (256,) (256,) (256,) (256,) (256,256,3,3) |
| backbone.bottom_up.res4.15.conv3.* | backbone.bottom_up.res4.15.conv3.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (1024,) (1024,) (1024,) (1024,) (1024,256,1,1) |
| backbone.bottom_up.res4.16.conv1.* | backbone.bottom_up.res4.16.conv1.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (256,) (256,) (256,) (256,) (256,1024,1,1) |
| backbone.bottom_up.res4.16.conv2.* | backbone.bottom_up.res4.16.conv2.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (256,) (256,) (256,) (256,) (256,256,3,3) |
| backbone.bottom_up.res4.16.conv3.* | backbone.bottom_up.res4.16.conv3.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (1024,) (1024,) (1024,) (1024,) (1024,256,1,1) |
| backbone.bottom_up.res4.17.conv1.* | backbone.bottom_up.res4.17.conv1.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (256,) (256,) (256,) (256,) (256,1024,1,1) |
| backbone.bottom_up.res4.17.conv2.* | backbone.bottom_up.res4.17.conv2.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (256,) (256,) (256,) (256,) (256,256,3,3) |
| backbone.bottom_up.res4.17.conv3.* | backbone.bottom_up.res4.17.conv3.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (1024,) (1024,) (1024,) (1024,) (1024,256,1,1) |
| backbone.bottom_up.res4.18.conv1.* | backbone.bottom_up.res4.18.conv1.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (256,) (256,) (256,) (256,) (256,1024,1,1) |
| backbone.bottom_up.res4.18.conv2.* | backbone.bottom_up.res4.18.conv2.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (256,) (256,) (256,) (256,) (256,256,3,3) |
| backbone.bottom_up.res4.18.conv3.* | backbone.bottom_up.res4.18.conv3.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (1024,) (1024,) (1024,) (1024,) (1024,256,1,1) |
| backbone.bottom_up.res4.19.conv1.* | backbone.bottom_up.res4.19.conv1.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (256,) (256,) (256,) (256,) (256,1024,1,1) |
| backbone.bottom_up.res4.19.conv2.* | backbone.bottom_up.res4.19.conv2.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (256,) (256,) (256,) (256,) (256,256,3,3) |
| backbone.bottom_up.res4.19.conv3.* | backbone.bottom_up.res4.19.conv3.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (1024,) (1024,) (1024,) (1024,) (1024,256,1,1) |
| backbone.bottom_up.res4.2.conv1.* | backbone.bottom_up.res4.2.conv1.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (256,) (256,) (256,) (256,) (256,1024,1,1) |
| backbone.bottom_up.res4.2.conv2.* | backbone.bottom_up.res4.2.conv2.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (256,) (256,) (256,) (256,) (256,256,3,3) |
| backbone.bottom_up.res4.2.conv3.* | backbone.bottom_up.res4.2.conv3.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (1024,) (1024,) (1024,) (1024,) (1024,256,1,1) |
| backbone.bottom_up.res4.20.conv1.* | backbone.bottom_up.res4.20.conv1.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (256,) (256,) (256,) (256,) (256,1024,1,1) |
| backbone.bottom_up.res4.20.conv2.* | backbone.bottom_up.res4.20.conv2.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (256,) (256,) (256,) (256,) (256,256,3,3) |
| backbone.bottom_up.res4.20.conv3.* | backbone.bottom_up.res4.20.conv3.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (1024,) (1024,) (1024,) (1024,) (1024,256,1,1) |
| backbone.bottom_up.res4.21.conv1.* | backbone.bottom_up.res4.21.conv1.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (256,) (256,) (256,) (256,) (256,1024,1,1) |
| backbone.bottom_up.res4.21.conv2.* | backbone.bottom_up.res4.21.conv2.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (256,) (256,) (256,) (256,) (256,256,3,3) |
| backbone.bottom_up.res4.21.conv3.* | backbone.bottom_up.res4.21.conv3.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (1024,) (1024,) (1024,) (1024,) (1024,256,1,1) |
| backbone.bottom_up.res4.22.conv1.* | backbone.bottom_up.res4.22.conv1.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (256,) (256,) (256,) (256,) (256,1024,1,1) |
| backbone.bottom_up.res4.22.conv2.* | backbone.bottom_up.res4.22.conv2.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (256,) (256,) (256,) (256,) (256,256,3,3) |
| backbone.bottom_up.res4.22.conv3.* | backbone.bottom_up.res4.22.conv3.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (1024,) (1024,) (1024,) (1024,) (1024,256,1,1) |
| backbone.bottom_up.res4.3.conv1.* | backbone.bottom_up.res4.3.conv1.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (256,) (256,) (256,) (256,) (256,1024,1,1) |
| backbone.bottom_up.res4.3.conv2.* | backbone.bottom_up.res4.3.conv2.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (256,) (256,) (256,) (256,) (256,256,3,3) |
| backbone.bottom_up.res4.3.conv3.* | backbone.bottom_up.res4.3.conv3.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (1024,) (1024,) (1024,) (1024,) (1024,256,1,1) |
| backbone.bottom_up.res4.4.conv1.* | backbone.bottom_up.res4.4.conv1.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (256,) (256,) (256,) (256,) (256,1024,1,1) |
| backbone.bottom_up.res4.4.conv2.* | backbone.bottom_up.res4.4.conv2.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (256,) (256,) (256,) (256,) (256,256,3,3) |
| backbone.bottom_up.res4.4.conv3.* | backbone.bottom_up.res4.4.conv3.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (1024,) (1024,) (1024,) (1024,) (1024,256,1,1) |
| backbone.bottom_up.res4.5.conv1.* | backbone.bottom_up.res4.5.conv1.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (256,) (256,) (256,) (256,) (256,1024,1,1) |
| backbone.bottom_up.res4.5.conv2.* | backbone.bottom_up.res4.5.conv2.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (256,) (256,) (256,) (256,) (256,256,3,3) |
| backbone.bottom_up.res4.5.conv3.* | backbone.bottom_up.res4.5.conv3.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (1024,) (1024,) (1024,) (1024,) (1024,256,1,1) |
| backbone.bottom_up.res4.6.conv1.* | backbone.bottom_up.res4.6.conv1.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (256,) (256,) (256,) (256,) (256,1024,1,1) |
| backbone.bottom_up.res4.6.conv2.* | backbone.bottom_up.res4.6.conv2.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (256,) (256,) (256,) (256,) (256,256,3,3) |
| backbone.bottom_up.res4.6.conv3.* | backbone.bottom_up.res4.6.conv3.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (1024,) (1024,) (1024,) (1024,) (1024,256,1,1) |
| backbone.bottom_up.res4.7.conv1.* | backbone.bottom_up.res4.7.conv1.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (256,) (256,) (256,) (256,) (256,1024,1,1) |
| backbone.bottom_up.res4.7.conv2.* | backbone.bottom_up.res4.7.conv2.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (256,) (256,) (256,) (256,) (256,256,3,3) |
| backbone.bottom_up.res4.7.conv3.* | backbone.bottom_up.res4.7.conv3.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (1024,) (1024,) (1024,) (1024,) (1024,256,1,1) |
| backbone.bottom_up.res4.8.conv1.* | backbone.bottom_up.res4.8.conv1.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (256,) (256,) (256,) (256,) (256,1024,1,1) |
| backbone.bottom_up.res4.8.conv2.* | backbone.bottom_up.res4.8.conv2.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (256,) (256,) (256,) (256,) (256,256,3,3) |
| backbone.bottom_up.res4.8.conv3.* | backbone.bottom_up.res4.8.conv3.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (1024,) (1024,) (1024,) (1024,) (1024,256,1,1) |
| backbone.bottom_up.res4.9.conv1.* | backbone.bottom_up.res4.9.conv1.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (256,) (256,) (256,) (256,) (256,1024,1,1) |
| backbone.bottom_up.res4.9.conv2.* | backbone.bottom_up.res4.9.conv2.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (256,) (256,) (256,) (256,) (256,256,3,3) |
| backbone.bottom_up.res4.9.conv3.* | backbone.bottom_up.res4.9.conv3.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (1024,) (1024,) (1024,) (1024,) (1024,256,1,1) |
| backbone.bottom_up.res5.0.conv1.* | backbone.bottom_up.res5.0.conv1.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (512,) (512,) (512,) (512,) (512,1024,1,1) |
| backbone.bottom_up.res5.0.conv2.* | backbone.bottom_up.res5.0.conv2.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (512,) (512,) (512,) (512,) (512,512,3,3) |
| backbone.bottom_up.res5.0.conv3.* | backbone.bottom_up.res5.0.conv3.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (2048,) (2048,) (2048,) (2048,) (2048,512,1,1) |
| backbone.bottom_up.res5.0.shortcut.* | backbone.bottom_up.res5.0.shortcut.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (2048,) (2048,) (2048,) (2048,) (2048,1024,1,1) |
| backbone.bottom_up.res5.1.conv1.* | backbone.bottom_up.res5.1.conv1.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (512,) (512,) (512,) (512,) (512,2048,1,1) |
| backbone.bottom_up.res5.1.conv2.* | backbone.bottom_up.res5.1.conv2.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (512,) (512,) (512,) (512,) (512,512,3,3) |
| backbone.bottom_up.res5.1.conv3.* | backbone.bottom_up.res5.1.conv3.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (2048,) (2048,) (2048,) (2048,) (2048,512,1,1) |
| backbone.bottom_up.res5.2.conv1.* | backbone.bottom_up.res5.2.conv1.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (512,) (512,) (512,) (512,) (512,2048,1,1) |
| backbone.bottom_up.res5.2.conv2.* | backbone.bottom_up.res5.2.conv2.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (512,) (512,) (512,) (512,) (512,512,3,3) |
| backbone.bottom_up.res5.2.conv3.* | backbone.bottom_up.res5.2.conv3.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (2048,) (2048,) (2048,) (2048,) (2048,512,1,1) |
| backbone.bottom_up.stem.conv1.* | backbone.bottom_up.stem.conv1.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (64,) (64,) (64,) (64,) (64,3,7,7) |
| backbone.fpn_lateral2.* | backbone.fpn_lateral2.{bias,weight} | (256,) (256,256,1,1) |
| backbone.fpn_lateral3.* | backbone.fpn_lateral3.{bias,weight} | (256,) (256,512,1,1) |
| backbone.fpn_lateral4.* | backbone.fpn_lateral4.{bias,weight} | (256,) (256,1024,1,1) |
| backbone.fpn_lateral5.* | backbone.fpn_lateral5.{bias,weight} | (256,) (256,2048,1,1) |
| backbone.fpn_output2.* | backbone.fpn_output2.{bias,weight} | (256,) (256,256,3,3) |
| backbone.fpn_output3.* | backbone.fpn_output3.{bias,weight} | (256,) (256,256,3,3) |
| backbone.fpn_output4.* | backbone.fpn_output4.{bias,weight} | (256,) (256,256,3,3) |
| backbone.fpn_output5.* | backbone.fpn_output5.{bias,weight} | (256,) (256,256,3,3) |
| proposal_generator.rpn_head.anchor_deltas.* | proposal_generator.rpn_head.anchor_deltas.{bias,weight} | (12,) (12,256,1,1) |
| proposal_generator.rpn_head.conv.* | proposal_generator.rpn_head.conv.{bias,weight} | (256,) (256,256,3,3) |
| proposal_generator.rpn_head.objectness_logits.* | proposal_generator.rpn_head.objectness_logits.{bias,weight} | (3,) (3,256,1,1) |
| roi_heads.box_head.fc1.* | roi_heads.box_head.fc1.{bias,weight} | (1024,) (1024,12544) |
| roi_heads.box_head.fc2.* | roi_heads.box_head.fc2.{bias,weight} | (1024,) (1024,1024) |
| roi_heads.box_predictor.bbox_pred.* | roi_heads.box_predictor.bbox_pred.{bias,weight} | (4,) (4,1024) |
| roi_heads.box_predictor.cls_score.* | roi_heads.box_predictor.cls_score.{bias,weight} | (2,) (2,1024) |
| roi_heads.mask_head.deconv.* | roi_heads.mask_head.deconv.{bias,weight} | (256,) (256,256,2,2) |
| roi_heads.mask_head.mask_fcn1.* | roi_heads.mask_head.mask_fcn1.{bias,weight} | (256,) (256,256,3,3) |
| roi_heads.mask_head.mask_fcn2.* | roi_heads.mask_head.mask_fcn2.{bias,weight} | (256,) (256,256,3,3) |
| roi_heads.mask_head.mask_fcn3.* | roi_heads.mask_head.mask_fcn3.{bias,weight} | (256,) (256,256,3,3) |
| roi_heads.mask_head.mask_fcn4.* | roi_heads.mask_head.mask_fcn4.{bias,weight} | (256,) (256,256,3,3) |
| roi_heads.mask_head.predictor.* | roi_heads.mask_head.predictor.{bias,weight} | (1,) (1,256,1,1) |
Then, we randomly select several samples to visualize the prediction results.
def get_mask(im):
outputs = predictor(im)
max_index = outputs['instances'].scores.tolist().index(max(outputs['instances'].scores.tolist()))
mask = outputs['instances'].pred_masks[max_index].tolist()
mask = np.array(mask).reshape(im.shape[0],im.shape[1],1)*im
x1 = outputs['instances'].pred_boxes.tensor.tolist()[max_index][0]
y1 = outputs['instances'].pred_boxes.tensor.tolist()[max_index][1]
x2 = outputs['instances'].pred_boxes.tensor.tolist()[max_index][2]
y2 = outputs['instances'].pred_boxes.tensor.tolist()[max_index][3]
#imshow(mask[int(y1):int(y2),int(x1):int(x2),:])
print(y2,y1,x2,x1,(y2-y1)*(x2-x1))
return [int(y1),int(y2),int(x1),int(x2)]
im1 = get_mask(im)
/root/anaconda3/lib/python3.9/site-packages/torch/nn/functional.py:718: UserWarning: Named tensors and all their associated APIs are an experimental feature and subject to change. Please do not use them for anything important until they are released as stable. (Triggered internally at /pytorch/c10/core/TensorImpl.h:1156.) return torch.max_pool2d(input, kernel_size, stride, padding, dilation, ceil_mode)
1468.1954345703125 1293.2349853515625 1364.9661865234375 1215.608642578125 26131.66298288107
%%time
#-- 5-10 seconds ## Loading to prediction to output
im = imread("/root/raghav/labelstudio/"+k)
outputs = predictor(im) # format is documented at https://detectron2.readthedocs.io/tutorials/models.html#model-output-format
v = Visualizer(im[:, :, ::-1],
scale=0.75,
# remove the colors of unsegmented pixels. This option is only available for segmentation models
)
out = v.draw_instance_predictions(outputs["instances"].to("cpu"))
CPU times: user 1.3 s, sys: 130 ms, total: 1.43 s Wall time: 1.3 s
box_dict = {}
for i in dataset_dicts['images']:
k = i['file_name']
im = cv2.imread("/root/raghav/labelstudio/"+k)
try:
a = get_mask(im)
box_dict[k] = a
print(k,a)
except:
pass
2213.11376953125 1993.5338134765625 830.9368896484375 492.2952880859375 74358.90798938274 images/11/bdcfba0e-14.jpg [1993, 2213, 492, 830] 1363.915771484375 1173.44189453125 895.8938598632812 744.4111328125 28853.502312794328 images/11/82c7c2ca-11.jpg [1173, 1363, 744, 895] 2331.93896484375 1381.3143310546875 2074.70458984375 1302.9000244140625 733696.4323683232 images/11/7f355d57-2.JPG [1381, 2331, 1302, 2074] 2240.393310546875 2084.628662109375 1066.9970703125 852.8511962890625 33356.356781601906 images/11/03865ad2-25.jpg [2084, 2240, 852, 1066] 2606.329345703125 2562.671875 1242.3629150390625 1044.86328125 8622.334476023912 images/11/a525426b-24.jpg [2562, 2606, 1044, 1242] 1468.1954345703125 1293.2349853515625 1364.9661865234375 1215.608642578125 26131.66298288107 images/11/a29d1440-23.jpg [1293, 1468, 1215, 1364] 219.8627166748047 178.30796813964844 95.97936248779297 44.45733642578125 2140.9848370286636 images/11/f66ef193-22.jpg [178, 219, 44, 95] 2941.13623046875 2716.9697265625 986.3261108398438 761.4359130859375 50412.84939327836 images/11/30b24faf-21.jpg [2716, 2941, 761, 986] 677.9794311523438 659.8280639648438 513.8724365234375 437.2713317871094 1390.4147790372372 images/11/3576ba5b-20.jpg [659, 677, 437, 513] 2639.767822265625 2587.56787109375 1023.1159057617188 715.7958374023438 16042.092562496662 images/11/39fa94e9-19.jpg [2587, 2639, 715, 1023] 2019.7305908203125 1840.5123291015625 1615.9908447265625 1070.1298828125 97828.25273436308 images/11/659fbde8-18.jpg [1840, 2019, 1070, 1615]
Invalid SOS parameters for sequential JPEG
700.133056640625 466.6375427246094 357.508544921875 192.16351318359375 38607.323159189895 images/11/8d9688df-17.jpg [466, 700, 192, 357] 874.297607421875 789.6838989257812 379.7408142089844 239.175537109375 11893.74938117899 images/11/96c90a35-16.jpg [789, 874, 239, 379] 1686.704833984375 1571.3988037109375 1483.6629638671875 1255.39990234375 26320.10748232901 images/11/69db667b-15.JPG [1571, 1686, 1255, 1483] 1818.5491943359375 1453.2490234375 1711.244384765625 634.609619140625 393294.8638780117 images/11/dec01bf9-13.jpg [1453, 1818, 634, 1711] 651.0650634765625 590.3082885742188 497.3738708496094 304.7451477050781 11703.499971818179 images/11/52df14cb-12.jpg [590, 651, 304, 497] 838.8355712890625 776.0136108398438 432.61199951171875 317.3257141113281 7242.510461760685 images/11/e73b6357-10.jpg [776, 838, 317, 432] 459.82696533203125 301.2104187011719 429.2563781738281 168.19210815429688 41409.11295920424 images/11/765ef5c1-9.jpg [301, 459, 168, 429] 2362.1552734375 2119.595703125 1963.114501953125 1738.6917724609375 54435.88083398342 images/11/a7bd461c-8.JPG [2119, 2362, 1738, 1963] 2068.41015625 1846.001708984375 1182.108154296875 987.1857299804688 43352.39372946322 images/11/de03b894-7.jpg [1846, 2068, 987, 1182] 2530.751220703125 2101.198486328125 2005.325927734375 748.83349609375 539729.7597327232 images/11/fbbba93f-6.JPG [2101, 2530, 748, 2005] 5233.74072265625 5162.193359375 3329.501708984375 2801.6298828125 37767.83731305599 images/11/23c6bf74-5.jpg [5162, 5233, 2801, 3329] 1832.986328125 1202.7701416015625 2418.65869140625 722.2001953125 1069135.604003489 images/11/0571f85d-4.jpg [1202, 1832, 722, 2418] 2204.8984375 2120.737548828125 1556.328125 1272.4105224609375 23894.757739275694 images/11/8cde8e52-3.jpg [2120, 2204, 1272, 1556] 1571.1124267578125 1395.929443359375 1528.3304443359375 835.5157470703125 121369.34560927749 images/11/16bee31b-1.jpg [1395, 1571, 835, 1528] 2421.9970703125 2291.085205078125 1170.643310546875 914.7128295898438 33504.33663241565 images/11/dd70db44-IMG_0343.JPG [2291, 2421, 914, 1170] 2291.00048828125 2222.3349609375 1423.87548828125 1208.0472412109375 14819.960400760174 images/11/ce39d6ee-IMG_0335.JPG [2222, 2291, 1208, 1423] 2461.2373046875 2374.244140625 1362.6220703125 1156.7894287109375 17906.03276026249 images/11/acef4032-IMG_0342.JPG [2374, 2461, 1156, 1362] 2420.017333984375 2333.370361328125 1094.765625 955.7530517578125 12045.01863259077 images/11/3a9d0723-IMG_0346.JPG [2333, 2420, 955, 1094] 2264.532470703125 2109.755859375 1176.8778076171875 955.6858520507812 34235.341335609555 images/11/060aaccc-IMG_0345.JPG [2109, 2264, 955, 1176] 2844.13427734375 2744.190673828125 1549.469970703125 1308.5311279296875 24080.296173661947 images/11/e0400583-IMG_0347.JPG [2744, 2844, 1308, 1549] 2548.83203125 2372.49365234375 1235.7537841796875 1201.0631103515625 6117.297186017036 images/11/5d1b016a-IMG_0336.JPG [2372, 2548, 1201, 1235] 2362.9150390625 2179.443115234375 1196.54736328125 808.3644409179688 71220.6675632149 images/11/abfcc678-IMG_0339.JPG [2179, 2362, 808, 1196] 2897.173583984375 2782.49951171875 1304.4310302734375 1115.6552734375 21647.68478140235 images/11/9152cc57-IMG_0349.JPG [2782, 2897, 1115, 1304] 2178.522216796875 2076.779541015625 1245.202392578125 1092.352294921875 15551.377928972244 images/11/5fae8912-IMG_0338.JPG [2076, 2178, 1092, 1245] 2412.2109375 2342.7880859375 1161.607421875 936.3329467773438 15639.196445524693 images/11/4946c5fb-IMG_0331.JPG [2342, 2412, 936, 1161] 2663.8896484375 2582.580322265625 1501.8231201171875 1337.578125 13354.649880081415 images/11/cd6ea9ce-IMG_0355.JPG [2582, 2663, 1337, 1501] 2500.86376953125 2378.303466796875 1399.1336669921875 1174.580078125 27521.35583165288 images/11/7b2945ef-IMG_0340.JPG [2378, 2500, 1174, 1399] 2623.43359375 2518.29931640625 1416.4542236328125 1140.50537109375 29011.68319553137 images/11/8b4b117d-IMG_0344.JPG [2518, 2623, 1140, 1416] 3071.157958984375 2982.8095703125 1514.810791015625 1320.9766845703125 17124.93097409606 images/11/a2f9206c-IMG_0353.JPG [2982, 3071, 1320, 1514] 2640.319091796875 2424.7216796875 1115.467529296875 1050.1514892578125 14081.969201654196 images/11/e4f2229d-IMG_0337.JPG [2424, 2640, 1050, 1115] 2666.634033203125 2564.0966796875 1476.0267333984375 1315.259765625 16484.619408220053 images/11/1af4a433-IMG_0354.JPG [2564, 2666, 1315, 1476] 2295.868896484375 2175.04541015625 1271.450439453125 1053.2735595703125 26360.891263633966 images/11/8f5d6a91-IMG_0332.JPG [2175, 2295, 1053, 1271] 2162.60546875 1843.21630859375 1079.6893310546875 856.0884399414062 71415.70082286 images/11/b6e1149a-IMG_0329.JPG [1843, 2162, 856, 1079] 2673.288818359375 2581.06787109375 1232.3536376953125 1109.6611328125 11314.81902268529 images/11/6d9d2d6f-IMG_0359.JPG [2581, 2673, 1109, 1232] 2560.627685546875 2424.855712890625 1428.484130859375 1202.532958984375 30677.836329460144 images/11/8ce904be-IMG_0348.JPG [2424, 2560, 1202, 1428] 2360.52734375 2250.020751953125 823.954345703125 709.2855224609375 12671.660841852427 images/11/438005ea-IMG_0326.JPG [2250, 2360, 709, 823] 2639.96337890625 2552.760986328125 1185.3531494140625 1029.632080078125 13579.249820917845 images/11/ed89dda7-IMG_0321.JPG [2552, 2639, 1029, 1185] 2366.402099609375 2242.737548828125 1260.3187255859375 1159.99072265625 12407.017413079739 images/11/8b92b444-IMG_0322.JPG [2242, 2366, 1159, 1260] 2312.3779296875 2202.75732421875 1507.800537109375 1363.369873046875 15832.576842784882 images/11/f2b94273-IMG_0350.JPG [2202, 2312, 1363, 1507] 2284.098876953125 2183.12939453125 1217.2496337890625 1069.9322509765625 14874.559894323349 images/11/a99d9f5b-IMG_0323.JPG [2183, 2284, 1069, 1217] 2510.920166015625 2466.939208984375 1255.4263916015625 1028.3677978515625 9986.2542552948 images/11/2f68429a-IMG_0333.JPG [2466, 2510, 1028, 1255] 2524.831787109375 2452.073486328125 1243.888671875 999.2626342773438 17798.574822455645 images/11/62c4d761-IMG_0334.JPG [2452, 2524, 999, 1243] 2609.711669921875 2428.610107421875 1509.612548828125 1422.8714599609375 15708.946726799011 images/11/9a34f8a1-IMG_0320.JPG [2428, 2609, 1422, 1509] 2455.18310546875 2239.640869140625 1230.784912109375 1044.853271484375 40076.12162446976 images/11/838b8505-IMG_0330.JPG [2239, 2455, 1044, 1230] 2781.961669921875 2687.9482421875 1193.9986572265625 1006.627197265625 17615.433210521936 images/11/f2724ae4-IMG_0319.JPG [2687, 2781, 1006, 1193] 2901.995361328125 2736.460205078125 1052.5987548828125 909.5809326171875 23674.477555274963 images/11/e1a5c9ad-IMG_0312.JPG [2736, 2901, 909, 1052] 2775.4765625 2449.23095703125 1285.741455078125 1147.0177001953125 45258.01540464163 images/11/9259c363-IMG_0311.JPG [2449, 2775, 1147, 1285] 2043.6812744140625 1712.791748046875 1058.3525390625 858.6241455078125 66088.03354538977 images/11/7e632d99-IMG_0306.JPG [1712, 2043, 858, 1058] 2407.583251953125 2071.169677734375 1439.5577392578125 1090.9072265625 117290.76512902975 images/11/cfb5b88a-IMG_0301.JPG [2071, 2407, 1090, 1439] 2286.68017578125 2024.009033203125 1225.962646484375 1172.255126953125 14107.415520310402 images/11/60708a60-IMG_0316.JPG [2024, 2286, 1172, 1225] 2693.58154296875 2096.812255859375 1313.10888671875 1065.4344482421875 147804.49808487296 images/11/e3423fba-IMG_0308.JPG [2096, 2693, 1065, 1313] 2766.928955078125 2504.139892578125 1445.3353271484375 1165.5391845703125 73527.3659992218 images/11/9505cd1e-IMG_0299.JPG [2504, 2766, 1165, 1445] 2517.968017578125 2444.506591796875 1361.67626953125 1158.127685546875 14952.969195246696 images/11/7fc61b3a-IMG_0318.JPG [2444, 2517, 1158, 1361] 2673.554931640625 2386.7216796875 1210.613037109375 1152.922119140625 16547.673609137535 images/11/859c9222-IMG_0313.JPG [2386, 2673, 1152, 1210] 2849.074951171875 2550.048583984375 1401.039794921875 1093.5897216796875 91935.67849314213 images/11/4bd013d1-IMG_0303.JPG [2550, 2849, 1093, 1401] 2191.798095703125 2101.673828125 1240.1917724609375 1063.435302734375 15930.04737380147 images/11/a1a4c73b-IMG_0295.JPG [2101, 2191, 1063, 1240] 2809.42724609375 2504.707763671875 1345.116455078125 1024.350341796875 97743.68401753902 images/11/74f14799-IMG_0300.JPG [2504, 2809, 1024, 1345] 2627.28173828125 2445.81884765625 1179.113525390625 876.8929443359375 54841.820244550705 images/11/bf411d4c-IMG_0294.JPG [2445, 2627, 876, 1179] 2829.277099609375 2642.719482421875 1390.011474609375 1118.26611328125 50696.16709113121 images/11/a3bb42c6-IMG_0302.JPG [2642, 2829, 1118, 1390] 2691.045166015625 2624.855712890625 1338.711669921875 1152.5341796875 12322.98626279831 images/11/6cf19201-IMG_0309.JPG [2624, 2691, 1152, 1338] 2309.70166015625 2118.62158203125 867.4765625 660.2514038085938 39596.59951221943 images/11/5b1dbf69-IMG_E0297.JPG [2118, 2309, 660, 867] 2851.448974609375 2604.238525390625 1466.26806640625 1299.998779296875 41103.50515758991 images/11/83fd0d53-IMG_0304.JPG [2604, 2851, 1299, 1466] 2720.51611328125 2446.739013671875 1543.4952392578125 1267.1517333984375 75656.52353006601 images/11/9f7b7a1f-IMG_0298.JPG [2446, 2720, 1267, 1543]
list(box_dict.items())[0:6]
[('images/11/bdcfba0e-14.jpg', [1993, 2213, 492, 830]),
('images/11/82c7c2ca-11.jpg', [1173, 1363, 744, 895]),
('images/11/7f355d57-2.JPG', [1381, 2331, 1302, 2074]),
('images/11/03865ad2-25.jpg', [2084, 2240, 852, 1066]),
('images/11/a525426b-24.jpg', [2562, 2606, 1044, 1242]),
('images/11/a29d1440-23.jpg', [1293, 1468, 1215, 1364])]
for a,b in list(box_dict.items())[0:6]:
im = cv2.imread("/root/raghav/labelstudio/"+a)
outputs = predictor(im)
v = Visualizer(im[:, :, ::-1],
scale=1,
#instance_mode=ColorMode.IMAGE_BW # remove the colors of unsegmented pixels. This option is only available for segmentation models
)
out = v.draw_instance_predictions(outputs["instances"].to("cpu"))
actual = im[b[0]:b[1],b[2]:b[3],:]
#actual = imshow(a)
#pred = get_mask(b)
pred = out.get_image()[b[0]:b[1],b[2]:b[3],:]
#pred = out.get_image(b)
fig, ax = plt.subplots(1,4, figsize = (32,24))
ax[0].imshow(actual[:,:,::-1])
ax[2].imshow(pred[:,:,::-1])
ax[1].imshow(im[:,:,::-1])
ax[3].imshow(out.get_image())
print(outputs['instances'])
ax[0].set_title(f"original imag")
ax[2].set_title(f"masked Image - contours")
ax[1].set_title('actual image')
ax[3].set_title('image pred')
fig.show()
Instances(num_instances=5, image_height=4032, image_width=1908, fields=[pred_boxes: Boxes(tensor([[ 492.2953, 1993.5338, 830.9369, 2213.1138],
[ 725.5929, 1968.1691, 854.2525, 2078.5449],
[ 476.6273, 2176.9695, 594.9737, 2228.3889],
[ 652.2195, 1976.4672, 850.7067, 2128.5671],
[ 474.9111, 2165.6458, 605.6243, 2207.3381]], device='cuda:0')), scores: tensor([0.9918, 0.9413, 0.9237, 0.6282, 0.5230], device='cuda:0'), pred_classes: tensor([0, 0, 0, 0, 0], device='cuda:0'), pred_masks: tensor([[[False, False, False, ..., False, False, False],
[False, False, False, ..., False, False, False],
[False, False, False, ..., False, False, False],
...,
[False, False, False, ..., False, False, False],
[False, False, False, ..., False, False, False],
[False, False, False, ..., False, False, False]],
[[False, False, False, ..., False, False, False],
[False, False, False, ..., False, False, False],
[False, False, False, ..., False, False, False],
...,
[False, False, False, ..., False, False, False],
[False, False, False, ..., False, False, False],
[False, False, False, ..., False, False, False]],
[[False, False, False, ..., False, False, False],
[False, False, False, ..., False, False, False],
[False, False, False, ..., False, False, False],
...,
[False, False, False, ..., False, False, False],
[False, False, False, ..., False, False, False],
[False, False, False, ..., False, False, False]],
[[False, False, False, ..., False, False, False],
[False, False, False, ..., False, False, False],
[False, False, False, ..., False, False, False],
...,
[False, False, False, ..., False, False, False],
[False, False, False, ..., False, False, False],
[False, False, False, ..., False, False, False]],
[[False, False, False, ..., False, False, False],
[False, False, False, ..., False, False, False],
[False, False, False, ..., False, False, False],
...,
[False, False, False, ..., False, False, False],
[False, False, False, ..., False, False, False],
[False, False, False, ..., False, False, False]]], device='cuda:0')])
Instances(num_instances=6, image_height=2730, image_width=1535, fields=[pred_boxes: Boxes(tensor([[ 744.4111, 1173.4419, 895.8939, 1363.9158],
[ 433.5776, 1218.7476, 891.2090, 1562.3873],
[ 659.9612, 1296.2074, 792.5440, 1461.4200],
[ 434.7644, 1445.7448, 739.1277, 1537.2412],
[ 663.5048, 1208.4712, 871.9982, 1447.9796],
[ 417.7054, 1490.4789, 641.8448, 1538.8746]], device='cuda:0')), scores: tensor([0.9799, 0.9729, 0.7588, 0.6976, 0.6229, 0.5114], device='cuda:0'), pred_classes: tensor([0, 0, 0, 0, 0, 0], device='cuda:0'), pred_masks: tensor([[[False, False, False, ..., False, False, False],
[False, False, False, ..., False, False, False],
[False, False, False, ..., False, False, False],
...,
[False, False, False, ..., False, False, False],
[False, False, False, ..., False, False, False],
[False, False, False, ..., False, False, False]],
[[False, False, False, ..., False, False, False],
[False, False, False, ..., False, False, False],
[False, False, False, ..., False, False, False],
...,
[False, False, False, ..., False, False, False],
[False, False, False, ..., False, False, False],
[False, False, False, ..., False, False, False]],
[[False, False, False, ..., False, False, False],
[False, False, False, ..., False, False, False],
[False, False, False, ..., False, False, False],
...,
[False, False, False, ..., False, False, False],
[False, False, False, ..., False, False, False],
[False, False, False, ..., False, False, False]],
[[False, False, False, ..., False, False, False],
[False, False, False, ..., False, False, False],
[False, False, False, ..., False, False, False],
...,
[False, False, False, ..., False, False, False],
[False, False, False, ..., False, False, False],
[False, False, False, ..., False, False, False]],
[[False, False, False, ..., False, False, False],
[False, False, False, ..., False, False, False],
[False, False, False, ..., False, False, False],
...,
[False, False, False, ..., False, False, False],
[False, False, False, ..., False, False, False],
[False, False, False, ..., False, False, False]],
[[False, False, False, ..., False, False, False],
[False, False, False, ..., False, False, False],
[False, False, False, ..., False, False, False],
...,
[False, False, False, ..., False, False, False],
[False, False, False, ..., False, False, False],
[False, False, False, ..., False, False, False]]], device='cuda:0')])
Instances(num_instances=2, image_height=4032, image_width=3024, fields=[pred_boxes: Boxes(tensor([[1302.9000, 1381.3143, 2074.7046, 2331.9390],
[1632.3135, 1363.0688, 2009.4152, 1898.0432]], device='cuda:0')), scores: tensor([0.9825, 0.9427], device='cuda:0'), pred_classes: tensor([0, 0], device='cuda:0'), pred_masks: tensor([[[False, False, False, ..., False, False, False],
[False, False, False, ..., False, False, False],
[False, False, False, ..., False, False, False],
...,
[False, False, False, ..., False, False, False],
[False, False, False, ..., False, False, False],
[False, False, False, ..., False, False, False]],
[[False, False, False, ..., False, False, False],
[False, False, False, ..., False, False, False],
[False, False, False, ..., False, False, False],
...,
[False, False, False, ..., False, False, False],
[False, False, False, ..., False, False, False],
[False, False, False, ..., False, False, False]]], device='cuda:0')])
Instances(num_instances=3, image_height=4032, image_width=1960, fields=[pred_boxes: Boxes(tensor([[ 852.8512, 2084.6287, 1066.9971, 2240.3933],
[1000.0120, 2069.0386, 1080.1000, 2121.9392],
[ 839.3368, 2158.1443, 967.0345, 2258.1899]], device='cuda:0')), scores: tensor([0.9907, 0.9373, 0.9294], device='cuda:0'), pred_classes: tensor([0, 0, 0], device='cuda:0'), pred_masks: tensor([[[False, False, False, ..., False, False, False],
[False, False, False, ..., False, False, False],
[False, False, False, ..., False, False, False],
...,
[False, False, False, ..., False, False, False],
[False, False, False, ..., False, False, False],
[False, False, False, ..., False, False, False]],
[[False, False, False, ..., False, False, False],
[False, False, False, ..., False, False, False],
[False, False, False, ..., False, False, False],
...,
[False, False, False, ..., False, False, False],
[False, False, False, ..., False, False, False],
[False, False, False, ..., False, False, False]],
[[False, False, False, ..., False, False, False],
[False, False, False, ..., False, False, False],
[False, False, False, ..., False, False, False],
...,
[False, False, False, ..., False, False, False],
[False, False, False, ..., False, False, False],
[False, False, False, ..., False, False, False]]], device='cuda:0')])
Instances(num_instances=1, image_height=4032, image_width=3024, fields=[pred_boxes: Boxes(tensor([[1044.8633, 2562.6719, 1242.3629, 2606.3293]], device='cuda:0')), scores: tensor([0.5531], device='cuda:0'), pred_classes: tensor([0], device='cuda:0'), pred_masks: tensor([[[False, False, False, ..., False, False, False],
[False, False, False, ..., False, False, False],
[False, False, False, ..., False, False, False],
...,
[False, False, False, ..., False, False, False],
[False, False, False, ..., False, False, False],
[False, False, False, ..., False, False, False]]], device='cuda:0')])
Instances(num_instances=5, image_height=4032, image_width=3024, fields=[pred_boxes: Boxes(tensor([[1215.6086, 1293.2350, 1364.9662, 1468.1954],
[1054.0917, 1463.2124, 1256.2151, 1563.3062],
[1053.2366, 1317.3365, 1361.6282, 1544.8196],
[1268.5144, 1283.4891, 1364.2344, 1413.8016],
[1045.9758, 1496.9252, 1218.1167, 1571.3212]], device='cuda:0')), scores: tensor([0.9762, 0.9700, 0.8129, 0.6798, 0.6142], device='cuda:0'), pred_classes: tensor([0, 0, 0, 0, 0], device='cuda:0'), pred_masks: tensor([[[False, False, False, ..., False, False, False],
[False, False, False, ..., False, False, False],
[False, False, False, ..., False, False, False],
...,
[False, False, False, ..., False, False, False],
[False, False, False, ..., False, False, False],
[False, False, False, ..., False, False, False]],
[[False, False, False, ..., False, False, False],
[False, False, False, ..., False, False, False],
[False, False, False, ..., False, False, False],
...,
[False, False, False, ..., False, False, False],
[False, False, False, ..., False, False, False],
[False, False, False, ..., False, False, False]],
[[False, False, False, ..., False, False, False],
[False, False, False, ..., False, False, False],
[False, False, False, ..., False, False, False],
...,
[False, False, False, ..., False, False, False],
[False, False, False, ..., False, False, False],
[False, False, False, ..., False, False, False]],
[[False, False, False, ..., False, False, False],
[False, False, False, ..., False, False, False],
[False, False, False, ..., False, False, False],
...,
[False, False, False, ..., False, False, False],
[False, False, False, ..., False, False, False],
[False, False, False, ..., False, False, False]],
[[False, False, False, ..., False, False, False],
[False, False, False, ..., False, False, False],
[False, False, False, ..., False, False, False],
...,
[False, False, False, ..., False, False, False],
[False, False, False, ..., False, False, False],
[False, False, False, ..., False, False, False]]], device='cuda:0')])
outputs['instances']
Instances(num_instances=5, image_height=4032, image_width=3024, fields=[pred_boxes: Boxes(tensor([[1215.6086, 1293.2350, 1364.9662, 1468.1954],
[1054.0917, 1463.2124, 1256.2151, 1563.3062],
[1053.2366, 1317.3365, 1361.6282, 1544.8196],
[1268.5144, 1283.4891, 1364.2344, 1413.8016],
[1045.9758, 1496.9252, 1218.1167, 1571.3212]], device='cuda:0')), scores: tensor([0.9762, 0.9700, 0.8129, 0.6798, 0.6142], device='cuda:0'), pred_classes: tensor([0, 0, 0, 0, 0], device='cuda:0'), pred_masks: tensor([[[False, False, False, ..., False, False, False],
[False, False, False, ..., False, False, False],
[False, False, False, ..., False, False, False],
...,
[False, False, False, ..., False, False, False],
[False, False, False, ..., False, False, False],
[False, False, False, ..., False, False, False]],
[[False, False, False, ..., False, False, False],
[False, False, False, ..., False, False, False],
[False, False, False, ..., False, False, False],
...,
[False, False, False, ..., False, False, False],
[False, False, False, ..., False, False, False],
[False, False, False, ..., False, False, False]],
[[False, False, False, ..., False, False, False],
[False, False, False, ..., False, False, False],
[False, False, False, ..., False, False, False],
...,
[False, False, False, ..., False, False, False],
[False, False, False, ..., False, False, False],
[False, False, False, ..., False, False, False]],
[[False, False, False, ..., False, False, False],
[False, False, False, ..., False, False, False],
[False, False, False, ..., False, False, False],
...,
[False, False, False, ..., False, False, False],
[False, False, False, ..., False, False, False],
[False, False, False, ..., False, False, False]],
[[False, False, False, ..., False, False, False],
[False, False, False, ..., False, False, False],
[False, False, False, ..., False, False, False],
...,
[False, False, False, ..., False, False, False],
[False, False, False, ..., False, False, False],
[False, False, False, ..., False, False, False]]], device='cuda:0')])